##### ####################################################
#####                                               ######
#####             Direct references prep             
#####                                               ######
##### ####################################################

rm(list=ls())
set.seed(221186)

# Load libraries

library(data.table) # 1.11.4
library(igraph) # 1.2.2

# Load data

load("data/speeches.Rdata")

# Process data

speeches <- speeches[minister_present == T]

speeches[,hpos.new:=hpos-(min(hpos)-1),by=subsection_id]

speeches[,hpos.zero.one:=(hpos.new-min(hpos.new))/(max(hpos.new)-min(hpos.new)),by=section_id]

speech_refs <- speeches[,list(body = gsub("[[:punct:]]","",paste0(body, collapse = "")), 
                            constituency = gsub("[[:punct:]]","",unique(constituency)), 
                            name = unique(name), 
                            Gender = unique(Gender), 
                            minister_gender= unique(minister_gender), 
                            mean_page_rank = mean(page_rank), 
                            mean_eigen = mean(eigen), 
                            minister_in_debate = any(minister_in_debate),
                            minister_name = unique(minister_name),
                            gov = unique(gov),
                            yearmon = unique(yearmon),
                            hdate = unique(hdate),
                            member_id = unique(member_id),
                            sum_word_count = sum(word_count),
                            debate_department = unique(debate_department),
                            Party = unique(party_short),
                            parent = unique(parent),
                            speaker = any(is_speaker),
                            mean_hpos.zero.one = mean(hpos.zero.one)),
                      by = list(subsection_id, twfy_person_id)]

# Set up containers for storing output

speech_refs$reference_page_rank <- vector(mode = "numeric", length = dim(speech_refs)[1])
speech_refs$reference_degree <- vector(mode = "numeric", length = dim(speech_refs)[1])
speech_refs$reference_eigen <- vector(mode = "numeric", length = dim(speech_refs)[1])
speech_refs$reference_hub <- vector(mode = "numeric", length = dim(speech_refs)[1])
speech_refs$reference_auth <- vector(mode = "numeric", length = dim(speech_refs)[1])

speech_refs$reference_page_rank <- NA
speech_refs$reference_degree <- NA
speech_refs$reference_eigen <- NA
speech_refs$reference_hub <- NA
speech_refs$reference_auth <- NA

speech_refs$any_references <- FALSE

# Loop over debates, finding direct mentions of each MP/their constituency
reference_list <- list()
j <- 1
start <- proc.time()

for(j in 1:length(unique(speeches$subsection_id))){
elapsed_time <- proc.time() - start
reference_list[[j]] <- list()
if(j%%100==0) {
  cat(paste0("\n",round((elapsed_time[3]/j * (length(unique(speeches$subsection_id)) - j))/60), " minutes remaining.\n"))
}

cat(".")
this_debate <- speech_refs[subsection_id==unique(subsection_id)[j]]

if(dim(this_debate)[1]==1) next

cons_in_debate <- this_debate$constituency
ref_adj_mat <- matrix(0, length(cons_in_debate), length(cons_in_debate))
rownames(ref_adj_mat) <- this_debate$twfy_person_id
colnames(ref_adj_mat) <- this_debate$twfy_person_id

names_in_debate <- gsub("Rt Hon |Mr |Mrs |Miss |Ms |Dr |Sir |","",this_debate$name)
names_adj_mat <- matrix(0, length(names_in_debate), length(names_in_debate))
rownames(names_adj_mat) <- this_debate$twfy_person_id
colnames(names_adj_mat) <- this_debate$twfy_person_id
#i <- 1
for(i in 1:length(cons_in_debate)){
  
  # Does anyone mention this constituency in the debate?
  references <- grep(cons_in_debate[i], this_debate$body, ignore.case = T)
  ref_adj_mat[i,references] <- 1
  
  # Does anyone mention this mp's name in the debate?
  references <- grep(names_in_debate[i], this_debate$body)
  names_adj_mat[i,references] <- 1
  
  reference_list[[j]][[i]] <- c(speaker = this_debate$twfy_person_id[i], names(which((ref_adj_mat[i,] + names_adj_mat[i,])==1)))
  
}

# Does anyone mention the Prime Minister, the Chancellor, the Foreign Secretary, the Home Secretary or the Secretary of State in the debate?

minister_to_find <- "Secretary of State"
if(unique(this_debate$debate_department)=="Prime Minister") minister_to_find <- "Prime Minister"
if(unique(this_debate$debate_department)=="Chancellor of the Exchequer") minister_to_find <- "Chancellor"
if(unique(this_debate$debate_department)=="Foreign") minister_to_find <- "Foreign Secretary"
if(unique(this_debate$debate_department)=="Home") minister_to_find <- "Home Secretary"

names_adj_mat[which(this_debate$minister_in_debate), grep(minister_to_find, this_debate$body)] <- 1

reference_list[[j]][[which(this_debate$minister_in_debate)]] <- c(reference_list[[j]][[which(this_debate$minister_in_debate)]][1], unique(c(reference_list[[j]][[which(this_debate$minister_in_debate)]][-1], this_debate$twfy_person_id[grep(minister_to_find, this_debate$body)])))

## Combine the ref matrix and the names matrix
ref_adj_mat <- ref_adj_mat + names_adj_mat
ref_adj_mat[ref_adj_mat>0] <- 1

mygraph.directed <- graph_from_adjacency_matrix(ref_adj_mat, diag=F ,mode="directed" ,weighted=NULL) 
mygraph.undirected <- graph_from_adjacency_matrix(ref_adj_mat, diag=F ,mode="undirected" ,weighted=NULL) 

V(mygraph.directed)$name <- this_debate$twfy_person_id
V(mygraph.undirected)$name <- this_debate$twfy_person_id

# Calculate the page rank score
page.rank.vec <- page.rank(mygraph.directed,directed=T)$vector

# Calculate the eigenvector centrality scores
eigen.vec <- eigen_centrality(mygraph.undirected, directed=F)$vector

# Calculate the hub and authority scores
hub.vec <- hub_score(mygraph.directed)$vector
auth.vec <- authority_score(mygraph.directed)$vector

# Calculate out degreee scores
degree.vec <- degree(mygraph.directed)

if(!all(speech_refs[subsection_id==unique(subsection_id)[j]]$twfy_person_id == names(page.rank.vec))) stop("Something went wrong.")

speech_refs[subsection_id==unique(subsection_id)[j]]$reference_page_rank  <- page.rank.vec
speech_refs[subsection_id==unique(subsection_id)[j]]$reference_eigen  <- eigen.vec
speech_refs[subsection_id==unique(subsection_id)[j]]$reference_degree  <- degree.vec
speech_refs[subsection_id==unique(subsection_id)[j]]$reference_hub  <- hub.vec
speech_refs[subsection_id==unique(subsection_id)[j]]$reference_auth  <- auth.vec

if(any(ref_adj_mat==1)) speech_refs[subsection_id==unique(subsection_id)[j]]$any_references <- T

}

save(speech_refs, reference_list, file = "working/speech_refs.Rdata")

